In [1]:
# egrep.py
import sys, re
# sys.argv is the list of command-line arguments
# sys.argv[0] is the name of the program itself
# sys.argv[1] will be the regex specified at the command line
regex = sys.argv[1]
# for every line passed into the script
for line in sys.stdin:
# if it matches the regex, write it to stdout
if re.search(regex, line):
sys.stdout.write(line)
In [9]:
# line_count.py
import sys
count = 0
for line in sys.stdin:
count += 1
# print goes to sys.stdout
print(count)
In [7]:
!type SomeFile.txt | python egrep.py "[0-9]" | python line_count.py
In [10]:
# 'r' means read-only
file_for_reading = open('reading_file.txt', 'r')
# 'w' is write—will destroy the file if it already exists!
file_for_writing = open('writing_file.txt', 'w')
# 'a' is append—for adding to the end of the file
file_for_appending = open('appending_file.txt', 'a')
# don't forget to close your files when you're done
file_for_writing.close()
Use a with
block to ensure that files are closed:
In [15]:
with open('SomeFile.txt', 'r') as f:
for line in f:
print(line.strip())
# After with block, file is closed
In [64]:
import csv
with open('stocks.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
date = row[0]
symbol = row[1]
closing_price = float(row[2])
print(date, symbol, closing_price)
In [65]:
with open('stocks-headers.csv', 'r') as f:
reader = csv.DictReader(f, delimiter=':')
for row in reader:
date = row['date']
symbol = row['symbol']
closing_price = float(row['closing_price'])
print(date, symbol, closing_price)
In [45]:
some_html = """
<html>
<head>
<title>A web page</title>
</head>
<body>
<p id="author">Joel Grus</p>
<p id="subject" class="important">Data Science</p>
</body>
</html>
"""
In [46]:
from bs4 import BeautifulSoup
import requests
html = requests.get('http://www.example.com').text
html = some_html
soup = BeautifulSoup(html, 'html5lib')
In [47]:
first_paragraph = soup.find('p')
first_paragraph
Out[47]:
In [48]:
soup.p.text, soup.p.text.split()
Out[48]:
In [49]:
soup.p['id']
Out[49]:
In [50]:
soup.p.get('id')
Out[50]:
In [51]:
soup.find_all('p')
Out[51]:
In [52]:
[p for p in soup('p') if p.get('id')]
Out[52]:
In [53]:
soup('p', {'class' : 'important'})
Out[53]:
In [54]:
soup('p', 'important')
Out[54]:
In [56]:
[p for p in soup('p') if 'important' in p.get('class', [])]
Out[56]:
In [59]:
import json
json_string = """{ "title" : "Data Science Book",
"author" : "Joel Grus",
"publicationYear" : 2014,
"topics" : [ "data", "science", "data science"] }"""
# parse the JSON into a Python Dictionary
dict = json.loads(json_string)
if 'data science' in dict['topics']:
print(dict)
In [ ]:
endpoint = 'https://api.github.com/users/joelgrus/repos'
repos = json.loads(requests.get(endpoint).text)
In [ ]: